hw7_p2

Author

Ryan Klein

Import Packages

import altair as alt
import pandas as pd
from altair import datum
alt.data_transformers.disable_max_rows()
DataTransformerRegistry.enable('default')

Part 4

democracy_url = "https://calvin-data304.netlify.app/data/wvs.csv"
democracy_data = pd.read_csv(democracy_url)
democracy_data['tens_only'] = (democracy_data['democracy_importance'] == 10).astype(int)*10
lines = alt.Chart(democracy_data).mark_line().encode(
    x=alt.X(field = "age6", type = "ordinal", sort = "-x"),
    y=alt.Y(field = "mean(tens_only)", type = "quantitative", title = "Average importance of democracy"),
).properties(
  width = 200, height = 400,
  title = ""
)

bands = alt.Chart(democracy_data).mark_errorband().encode(
    x=alt.X(field = "age6", type = "ordinal", sort = "-x", title = "Age Grouping"),
    y=alt.Y(field = "tens_only", type = "quantitative", title = "Average importance of democracy")
).properties(
  width = 200, height = 400,
  title = ""
)
alt.layer(lines + bands).facet(
  facet = "country:O"
)

Part 5

lines = alt.Chart(democracy_data).mark_line().encode(
    x=alt.X(field = "age6", type = "ordinal", sort = "-x"),
    y=alt.Y("mean(democracy_importance):Q"),
).properties(
  width = 200, height = 400
)

bands = alt.Chart(democracy_data).mark_errorband().encode(
    x=alt.X(field = "age6", type = "ordinal", sort = "-x", title = "Age Grouping"),
    y=alt.Y(field = "democracy_importance", type = "quantitative", title = "Average importance of democracy"),
).properties(
  width = 200, height = 400,
  title = ""
)
alt.layer(lines + bands).facet(
  facet = "country:O"
)

Part 6

lines = alt.Chart(democracy_data).mark_line().encode(
    x=alt.X(field = "age", type = "ordinal", sort = "-x"),
    y=alt.Y("mean(democracy_importance):Q"),
).properties(
  width = 200, height = 400
)

bands = alt.Chart(democracy_data).mark_errorband().encode(
    x=alt.X(field = "age", type = "ordinal", sort = "-x", title = "Age Grouping"),
    y=alt.Y(field = "democracy_importance", type = "quantitative", title = "Average importance of democracy"),
).properties(
  width = 200, height = 400,
  title = ""
)
alt.layer(lines + bands).facet(
  facet = "country:O"
)

Using “age” instead of “age6” makes the graphic worse, because “age” will calculate the statistics for each of the ages present in the dataset. On the other hand, the “age6” is useful, because it bins each of the cases into 6 bins that are defined by an age range. This makes the plot much less chaotic and much more interpretible.

Part 7

loess_chart_base = alt.Chart(democracy_data).encode(
  x=alt.X(field = "age6", type = "ordinal", sort = "-x", title = "Age Grouping"),
  y=alt.Y(field = "democracy_importance", type = "quantitative", title = "Average importance of democracy"),
  ).properties(
    width = 200, height = 400,
    title = ""
  )

loess_chart_base.transform_loess('age6', 'democracy_importance').mark_line().facet(
    facet = "country:O"
  )